**********************************************************************************************************
**  7/2022
**
**	Data Construction for "Partial Default" by Cristina Arellano, Victor Rios-Rull, Xavier Mateos-Planas 
**
**  Databases and Variables
**		1) International Debt Statistics: External debt stocks, public and publicly guaranteed (PPG) (DOD, current US$): name as debtppg; 
**      Debt service on external debt, total (TDS, current US$): name as debtserv_ppg
**				-Link to database: https://www.worldbank.org/en/programs/debt-statistics/ids
**		2) World Development Indicators: GDP (constant 2010 US$): name as rgdp; GDP (current US$): name as ngdp;
**      Final consumption expenditure (constant 2010 US$): name as rconsumption
**				-Link to database: https://datatopics.worldbank.org/world-development-indicators/
** 		3) Debtor Reporting System: Interest arrears official creditors (current US$) PPG: name as new_iaoppg; 
**      Interest arrears, private creditors (current US$) PPG, name as new_iapppg;
**		Principal arrears official creditors (current US$) PPG: name as new_paoppg; 
**      Principal arrears private creditors (current US$) PPG: name as new_papppg
**				-Data on arrears for interest and principal for PPG data is available via a data request 
**				from the World Bank Debtor Reporting System. This request can be done at the information 
**				portal at http://www.worldbank.org/en/access-to-information/request-submission, or from the Debt Statistics Team. 
**		4) Global Financial Database:  EMBI+ spread: name as spread  
**				-Link to database: https://finaeon.globalfinancialdata.com
**  Years Covered: 1970-2019
**  Countries:	Argentina, Bulgaria, Belize, Brazil,
** 				Chile, the Dominican Republic, Ecuador, Gabon, Ghana, Indonesia, Jamaica, Morocco, Mexico,
**				Nigeria, Pakistan, Panama, Peru, the Philippines, Poland, the Russian Federation, El Salvador,
**				Serbia, Trinidad and Tobago, Turkey, Ukraine, Uruguay, Venezuela, Vietnam, South Africa, China,
**				Colombia, Egypt, Hungary, Korea, Sri Lanka, Malaysia, and Tunisia.
********************************************************************************************************** 



clear all
*global datapath "/Users/apple/Downloads/Arellano/partial_default/clean_up"
global datapath "F:\Dropbox\defaulted debt\data\JPEdataupdate\final"

// file with raw panel data from 4 sources
use "$datapath\dataset.dta", clear
tsset wbcode year 

// construct partial default variable from World Bank Dataset

* arrears: sum of 4 PPG arrears(principal arrears and interest arrears for private and official creditor)
egen double PPG_arrears=rowtotal(new_*)
label variable PPG_arrears `"Defaulted PPG debt (current US$)"'

* new paydue
egen double paydue = rowtotal(PPG_arrears debtserv_ppg)
label variable paydue `"PPG debt payments due (current US$)"'

* drop Iraq
drop if countryname=="Iraq"

* partial default
tsset wbcode year
gen partialdef=.
replace partialdef=PPG_arrears/paydue if PPG_arrears>0 
replace partialdef=0 if PPG_arrears==0
label variable partialdef  `"Partial Default"'

sort wbstring year

// Detrended gdp
gen ly=log(rgdp)
gen ydt=.
 
sort wbcode year
qui foreach num of numlist 1/118  {
capture reg ly year if wbcode==`num' 
capture predict yy, resid
capture replace ydt=yy if wbcode==`num'
capture drop yy
}


//Detrended Consumption
gen lc=log(rconsumption)
gen cdt=.
 
sort wbcode year
qui foreach num of numlist 1/118  {
capture reg lc year if wbcode==`num' 
capture predict cc, resid
capture replace cdt=cc if wbcode==`num'
capture drop cc
}



// Debt as a percentage of GDP
gen debtppggdp = debtppg/ngdp
label variable debtppggdp `"Debt to output"'

gen arrgdp=PPG_arrears/ngdp
label variable arrgdp  `"Defaulted coupon to output"'
gen payduegdp=paydue/ngdp 
label variable payduegdp  `"Debt Due to output"'

gen debtserv_gdp=debtserv_ppg/ngdp
label variable debtserv_gdp  `"Debt service to output"'



//compute default episode
tsset  wbcode year 
gen parr=0
replace parr=1 if partialdef>0.01 & partialdef!=.
gen ndef=0

* create an index of default
bysort wbcode: replace ndef=1 if parr==1 & l1.parr==0  & year != 1970  
bysort wbcode: replace ndef=1 if parr==1 & year == 1970 
qui forval i=1/60 {
bysort wbcode: replace ndef=`i' if l1.ndef==(`i'-1) & parr==1
}

* create the length of each default episode
gen length=.
bysort wbcode: replace length=ndef if ndef!=0 & f1.parr==0 
bysort wbcode: replace length=ndef if ndef!=0 & f1.wbcode!=wbcode
qui forval i=1/49 {
bysort wbcode: replace length=length[_n+1] if  ndef!=0 & f1.parr!=0
bysort wbcode: replace length=ndef[_n] if  ndef!=0 & f1.wbcode!=wbcode
}


* Deafult episodes per country 
replace ndef=. if ndef==0
tsset wbcode year
by wbcode: egen nepi=sum(ndef) if ndef==1
sort wbcode nepi year
gen arrepi=1 if ndef==1 
by wbcode: replace arrepi=arrepi[_n-1]+1 if arrepi[_n-1]!=.& ndef!=.

tsset wbcode year
by wbcode: replace arrepi=arrepi[_n-1] if arrepi[_n-1]!=.& ndef!=.

gen cn_epi=wbcode+arrepi*1000 //unique identifier of each default episode



//Default episode dynamics
* create index of before, beginning, middle and end of the episode
* if a country has positive arrears in 1970, then the "before episode" period is treated as missing variables
* if a country has positive arrears in 2019, then the "after episode" are treated as missing variables

tsset wbcode year
by wbcode: gen epilen=ndef[_n+1]
by wbcode:  replace epilen=ndef+1 if year<2019 & epilen==.

by wbcode,  sort: gen cn_epi_t=cn_epi[_n+1] if  cn_epi[_n]==.
egen cn_epi2=rowtotal(cn_epi_t cn_epi),m
by cn_epi2, sort: egen total_len=max(epilen)


tsset wbcode year
gen epilen_p=epilen[_n-1]+1
by wbcode:  replace epilen_p=epilen if epilen==1 & epilen_p==. 
by wbcode:  replace epilen_p=epilen if epilen==2 & epilen_p==. & year==1970 

* After episode
gen debtafter=.
gen yafter=.
gen cafter=.
gen safter=.
gen pdafter=.

by wbcode: replace debtafter= debtppggdp if epilen_p==(total_len[_n-1]+1) & epilen_p!=. & year != 2019
by wbcode: replace yafter= ydt if epilen_p==(total_len[_n-1]+1) & epilen_p!=. & year != 2019
by wbcode: replace cafter= cdt if epilen_p==(total_len[_n-1]+1) & epilen_p!=. & year != 2019
by wbcode: replace safter= spread if epilen_p==(total_len[_n-1]+1) & epilen_p!=. & year != 2019
by wbcode: replace pdafter= partialdef if epilen_p==(total_len[_n-1]+1) & epilen_p!=. & year != 2019

* Middle
gen middle=ceil((total_len+1)/2)
gen middle_debt= debtppggdp if epilen== middle & epilen!=.
gen middle_y= ydt if epilen== middle & epilen!=.
gen middle_c= cdt if epilen== middle & epilen!=.
gen middle_spread= spread if epilen== middle & epilen!=.
gen middle_pd= partialdef if epilen== middle & epilen!=.


* Before
gen beg_debt= debtppggdp if epilen== 1 & epilen!=.
gen beg_y= ydt if epilen== 1 & epilen!=.
gen beg_c= cdt if epilen== 1 & epilen!=.
gen beg_spread= spread if epilen== 1 & epilen!=. 
gen beg_pd= partialdef if epilen== 1 & epilen!=.

* Begining 
gen beg2_debt= debtppggdp if epilen== 2 & epilen!=.
gen beg2_y= ydt if epilen== 2 & epilen!=.
gen beg2_c= cdt if epilen== 2 & epilen!=.
gen beg2_spread= spread if epilen== 2 & epilen!=.
gen beg2_pd= partialdef if epilen== 2 & epilen!=.



******************************
**** Figures
******************************

//* Fig 1:
// Country examples
graph twoway  ///
(scatter partialdef year if countryname=="Argentina" , xtitle(year) yaxis(1)  ysc( ax(1)) ), //////
title("Defaults in Argentina") ytitle("Partial Default") graphregion(color(white)) 
graph export new_partialhc_argentina.pdf , replace

graph twoway ///
(scatter partialdef year if countryname=="Russian Federation" , xtitle(year) yaxis(1)  ysc(ax(1)) ), /// 
title("Defaults in Russia") ytitle("Partial Default") graphregion(color(white)) 
graph export new_partialhc_russia.pdf , replace


//* Fig 2 - panel a
// default is partial
hist partialdef if partialdef>0.01, fraction color(gs10) lcolor(white) bin(40) graphregion(color(white))  xtitle("Partial Default")
graph export new_partial_hist.pdf , replace


//* Fig 2 - panel b
// default episode length
preserve
collapse length partialdef debtppggdp spread wbcode ydt (firstnm) country , by(cn_epi)
label variable length `"Years"'
hist length if length>0, fraction color(gs10) bin(20) lcolor(black) graphregion(color(white))  
graph export new_length_hist.pdf , replace
restore




******************************
**** Tables
******************************

//* Tab 1 
* upper panel: Partial Default Frequency (same as tab4)
gen partialdefp=0
replace partialdefp=1 if partialdef>0.01& PPG_arrears!=.  //frequency
sort wbcode year
bysort wbcode: egen stdPD=sd(partialdef) if partialdef>0.01& PPG_arrears!=.  //partial default SD

* partial default mean
tabstat partialdef if partialdef>0.01& PPG_arrears!=. , stat(mean) column(statistics)  
* frequency and sd
tabstat partialdefp stdPD, stat(mean)  column(statistics)



* middle panel: Default Episodes
preserve
collapse length partialdef, by(cn_epi)
label variable length `"Years"' 
gen short_epi=.
replace short_epi=1 if length<=2 & length !=. 
tabstat length, statistic(mean)
tabstat short_epi cn_epi, statistic(count)
restore

* lower panel: Default Episode Dynamics
tabstat beg_pd beg2_pd middle_pd pdafter, stat(mean)  column(statistics)   //Partial Default
tabstat beg_debt beg2_debt middle_debt debtafter, stat(mean)  column(statistics)  //Debt
tabstat beg_y beg2_y middle_y yafter, stat(mean)  column(statistics)  //Output


//* Tab 2
//  Default Groups: Statistics for spreads, partial default, detrended output, debt to output 
xtile pct4_pd= partialdef if partialdef>0.01, nq(4)
gen pd_g=pct4_pd
replace pd_g=0 if partialdef<0.01 
replace pd_g=2 if pct4_pd==3
* groups here 0=no default, 1<25pct =small pd, 2=25-75pd=med pdf, 4>75 pd= large default
bysort pd_g: egen gpd_pd=mean(partialdef)
bysort pd_g: egen gpd_spr=mean(spread)
bysort pd_g: egen gpd_dgdp=mean(debtppggdp)
bysort pd_g: egen gpd_ydt=mean(ydt)

table pd_g, contents(mean gpd_pd mean gpd_spr mean gpd_dgdp mean gpd_ydt )





//* Tab 4
* Within country standard deviations of:
sort wbcode year
bysort wbcode: egen stdDGDP=sd(debtppggdp)       //debt to output
bysort wbcode: egen stdDSGDP=sd(debtserv_gdp)    //debt service to output
bysort wbcode: egen stdspr=sd(spread)         //spreads

* mean and st dev of partial default, debt to output, debt service to output, debt due to output
tabstat partialdef if partialdef>0.01& PPG_arrears!=. , stat(mean) column(statistics)  //partial default mean
tabstat partialdefp stdPD debtppggdp stdDGDP debtserv_gdp stdDSGDP payduegdp stdspr, stat(mean)  column(statistics)


*Within country autocorrelations for output
bysort wbcode: egen cty=count(ly)
replace ydt=. if cty<5

bysort wbcode: gen ydtlag = l1.ydt
bysort wbcode: egen acordy1 = corr(ydt ydtlag)
bysort wbcode: egen stdGDP=sd(ydt)
tabstat acordy1 stdGDP, statistics(mean) column(statistics)


//Other Moments in Panel
* defaulted coupon to output given partial default
bysort wbcode: egen marrgdp=mean(arrgdp) if partialdef>0.01& PPG_arrears!=. 
bysort wbcode: egen stdarrgdp=sd(arrgdp) if partialdef>0.01& PPG_arrears!=.
tabstat marrgdp stdarrgdp, statistics(mean) column(statistics)


* spreads and within country correlations
bysort wbcode: egen corsprdy = corr(spread ydt)
bysort wbcode: egen corsprdgdp = corr(spread debtppggdp)
tabstat spread corsprdy corsprdgdp, statistics(mean) column(statistics)


*consumption sd relative to output
bysort wbcode: egen stdc=sd(cdt)
bysort wbcode: egen stdy=sd(ydt)
tabstat stdc stdy, statistics(mean)



//* Tab 5 "data" (same as Tab 2)
* groups here 0=no default, 1<25pct =small pd, 2=25-75pd=med pdf, 4>75 pd= large default
table pd_g, contents(mean gpd_pd mean gpd_dgdp mean gpd_spr mean gpd_ydt)



//* Tab 6 "data" (same as Tab 1)
* Properties of episodes
preserve
collapse length partialdef, by(cn_epi)
label variable length `"Years"' 
gen short_epi=.  //define short episode
replace short_epi=1 if length<=2 & length !=. 
tabstat length short_epi, statistic(mean count sd) //coeff of variation: sd(length)/mean(length)*100
pwcorr length partialdef //correlation of length and partial default
restore

* Default Episode Dynamics
tabstat beg_pd beg2_pd middle_pd pdafter, stat(mean)  column(statistics)   //Partial Default
tabstat beg_y beg2_y middle_y yafter, stat(mean)  column(statistics)  //Output
tabstat beg_debt beg2_debt middle_debt debtafter, stat(mean)  column(statistics)  //Debt
